Optimize path gc#777
Conversation
kangpinghuang
commented
Mar 19, 2019
- move path gc to DataDir
- use condition variable to coordinate scan and gc thread
- optimize pending paths to pending ids
| std::set<std::string> paths; | ||
| paths.insert(path); | ||
| if (!is_valid) { | ||
| LOG(WARNING) << "unknow path:" << path; |
There was a problem hiding this comment.
| LOG(WARNING) << "unknow path:" << path; | |
| LOG(WARNING) << "unknown path:" << path; |
| _remove_check_paths_no_lock(paths); | ||
| continue; | ||
| } else { | ||
| if (tablet_id >0 && schema_hash >0) { |
There was a problem hiding this comment.
| if (tablet_id >0 && schema_hash >0) { | |
| if (tablet_id > 0 && schema_hash > 0) { |
|
|
||
| bool TabletManager::get_rowset_id_from_path(const std::string& path, RowsetId* rowset_id) { | ||
| std::string pattern = "/data/\\d+/\\d+/\\d+/(\\d+)_.*"; | ||
| std::regex rgx (pattern.c_str()); |
There was a problem hiding this comment.
You should make this static to avoid constructing this multiple times.
| RowsetId rowset_id = 0; | ||
| RETURN_NOT_OK(_tablet->next_rowset_id(&rowset_id)); | ||
| RowsetWriterContextBuilder context_builder; | ||
| context_builder.set_rowset_id(rowset_id) |
There was a problem hiding this comment.
Build should have default value, otherwise we can create class instantly.
|
|
||
| void remove_pending_ids(const std::string& id); | ||
|
|
||
| void perform_path_gc(); |
There was a problem hiding this comment.
You should add comment about these two functions, and their relations
| return _pending_path_ids.find(id) != _pending_path_ids.end(); | ||
| } | ||
|
|
||
| void DataDir::_remove_check_paths_no_lock(const std::set<std::string> paths) { |
There was a problem hiding this comment.
Arguments should be reference
| } | ||
|
|
||
| // path consumer | ||
| void DataDir::perform_path_gc() { |
There was a problem hiding this comment.
You should add unit test for this two function
There was a problem hiding this comment.
I am trying to fix the unit tests and add the unit test case for these two function later
| std::string path = *path_iter; | ||
| TTabletId tablet_id = -1; | ||
| TSchemaHash schema_hash = -1; | ||
| bool is_valid = StorageEngine::instance()->tablet_manager()->get_tablet_id_and_schema_hash_from_path(path, |
There was a problem hiding this comment.
Instead of using instance(), you'd better pass tablet_manager to this class
| if (path.find(data_dir_path) != std::string::npos) { | ||
| std::string pattern = data_dir_path + "/data/\\d+/(\\d+)/?(\\d+)?"; | ||
| std::regex rgx (pattern.c_str()); | ||
| static std::string pattern = data_dir_path + "/data/\\d+/(\\d+)/?(\\d+)?"; |
There was a problem hiding this comment.
you can't use static here, it would lead a bug
|
|
||
| class RowsetWriterContextBuilder { | ||
| public: | ||
| RowsetWriterContextBuilder() { |
There was a problem hiding this comment.
I think this builder is useless, RowsetWriterContext is enough
|
|
||
| // path producer | ||
| void DataDir::perform_path_scan() { | ||
| std::unique_lock<std::mutex> lck(_check_path_mutex); |
There was a problem hiding this comment.
{
unique_lock
}
cv.notify_one()
| _scanned = false; | ||
| LOG(INFO) << "start to path gc."; | ||
| int counter = 0; | ||
| for (auto path_iter = _all_check_paths.begin(); path_iter != _all_check_paths.end(); ++path_iter) { |
There was a problem hiding this comment.
| for (auto path_iter = _all_check_paths.begin(); path_iter != _all_check_paths.end(); ++path_iter) { | |
| for (auto& path : _all_check_paths) { |
| // path producer | ||
| void DataDir::perform_path_scan() { | ||
| { | ||
| std::unique_lock<std::mutex> lck(_check_path_mutex); |
There was a problem hiding this comment.
if (_all_check_paths.size() > 0) {
return ;
}
4d42b66 to
7f35610
Compare